/*
Copyright (c) 2024 Bytedance Ltd. and/or its affiliates
This file is part of ByteQC.

ByteQC includes code adapted from PySCF (https://github.com/pyscf/pyscf,
https://github.com/hongzhouye/pyscf/tree/rsdf_direct),
which is licensed under the Apache License 2.0. The original copyright:
    Copyright 2014-2020 The PySCF Developers. All Rights Reserved.

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.

    Author: Qiming Sun <osirpt.sun@gmail.com>
    Author: Hong-Zhou Ye <hzyechem@gmail.com>

*/

#include <stdlib.h>
#include "config.h"

// HAVE_DEFINED_CINTOPT_H is defined in cint.h libcint v5.0. The CINTEnvVars
// struct definition below is not the same to the one in cint.h . However, it
// is compatible with both libcint v4 and libcint v5.
#ifndef HAVE_DEFINED_CINTENVVARS_H
#define HAVE_DEFINED_CINTENVVARS_H
typedef struct {
    int *atm;
    int *bas;
    double *env;
    int *shls;
    int natm;
    int nbas;

    int i_l;
    int j_l;
    int k_l;
    int l_l;
    int nfi; // number of cartesion components
    int nfj;
    int nfk;
    int nfl;
    int nf; // = nfi*nfj*nfk*nfl;
    int _padding;
    int x_ctr[4];

    int gbits;
    int ncomp_e1;     // = 1 if spin free, = 4 when spin included, it
    int ncomp_e2;     // corresponds to POSX,POSY,POSZ,POS1, see cint_const.h
    int ncomp_tensor; // e.g. = 3 for gradients

    /* values may diff based on the g0_2d4d algorithm */
    int li_ceil; // power of x, == i_l if nabla is involved, otherwise == i_l
    int lj_ceil;
    int lk_ceil;
    int ll_ceil;
    int g_stride_i; // nrys_roots * shift of (i++,k,l,j)
    int g_stride_k; // nrys_roots * shift of (i,k++,l,j)
    int g_stride_l; // nrys_roots * shift of (i,k,l++,j)
    int g_stride_j; // nrys_roots * shift of (i,k,l,j++)
    int nrys_roots;
    int g_size; // ref to cint2e.c g = malloc(sizeof(double)*g_size)

    int g2d_ijmax;
    int g2d_klmax;
    double common_factor;
    // The next four words are _padding1 and rirj[3] in libcint/qcint.
    // Replace them with four words (ai, aj, ak, al)
    // double _padding1;
    // double rirj[3];
    double ai[1];
    double aj[1];
    double ak[1];
    double al[1];
    double rkrl[3];
    double *rx_in_rijrx;
    double *rx_in_rklrx;

    double *ri;
    double *rj;
    double *rk;
    double *rl;

    void (*f_gout)();

    // Other definitions in CINTEnvVars are different in libcint and qcint.
    // They should not be used in this function.
} CINTEnvVars;
#endif

#define EXP_CUTOFF 100
#undef I

__device__ constexpr int _LEN_CART[] = {
        1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120, 136
};
__device__ constexpr int _CUM_LEN_CART[] = {
        1, 4, 10, 20, 35, 56, 84, 120, 165, 220, 286, 364, 455, 560, 680, 816,
};
__device__ constexpr int _GBUFSIZE[] = {
        1, 4, 10, 10, 20, 48, 20, 35, 75, 150, 35, 56, 108, 216, 384,
        56, 84, 147, 294, 510, 850, 84, 120, 192, 384, 654, 1090, 1640,
        120, 165, 243, 486, 816, 1360, 2040, 3030
};

__device__ constexpr int _UPIDY[] = {
        1,
        3, 4,
        6, 7, 8,
        10, 11, 12, 13,
        15, 16, 17, 18, 19,
        21, 22, 23, 24, 25, 26,
        28, 29, 30, 31, 32, 33, 34,
        36, 37, 38, 39, 40, 41, 42, 43,
        45, 46, 47, 48, 49, 50, 51, 52, 53,
        55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
        66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
        78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
        91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,
       105,106,107,108,109,110,111,112,113,114,115,116,117,118,
       120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,
};
__device__ constexpr int _UPIDZ[] = {
        2,
        4, 5,
        7, 8, 9,
        11, 12, 13, 14,
        16, 17, 18, 19, 20,
        22, 23, 24, 25, 26, 27,
        29, 30, 31, 32, 33, 34, 35,
        37, 38, 39, 40, 41, 42, 43, 44,
        46, 47, 48, 49, 50, 51, 52, 53, 54,
        56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
        67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
        79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
        92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,
       106,107,108,109,110,111,112,113,114,115,116,117,118,119,
       121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,
};
/*
 * _DOWN_XYZ, _DOWN_XYZ_ORDER, _DOWN1, _DOWN2 labels the index in the 1D
 * recursive relation f_{i+1} = i/2a * f_{i-1} + X * f_{i}
 * _DOWN_XYZ_ORDER      i in i/2a
 * _DOWN2               index of f_{i-1}
 * _DOWN_XYZ            index of X
 * _DOWN1               index of f_{i}
 */
__device__ constexpr int _DOWN1[] = {
-1,
0, 0, 0,
0, 1, 2, 1, 2, 2,
0, 0, 0, 3, 4, 5, 3, 3, 5, 5,
0, 0, 0, 3, 2, 5, 6, 7, 8, 9, 6, 6, 8, 9, 9,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 11, 12, 13, 14, 10, 10, 12, 13, 14, 14,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 16, 17, 18, 19, 20, 15, 15, 17, 18, 19, 20, 20,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 22, 23, 24, 25, 26, 27, 21, 21, 23, 24, 25, 26, 27, 27,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 29, 30, 31, 32, 33, 34, 35, 28, 28, 30, 31, 32, 33, 34, 35, 35,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 21, 30, 31, 32, 33, 27, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 36, 36, 38, 39, 40, 41, 42, 43, 44, 44,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 21, 30, 31, 32, 33, 27, 35, 36, 28, 38, 39, 40, 41, 42, 35, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 45, 45, 47, 48, 49, 50, 51, 52, 53, 54, 54,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 21, 30, 31, 32, 33, 27, 35, 36, 28, 38, 39, 40, 41, 42, 35, 44, 45, 36, 47, 48, 49, 50, 51, 52, 44, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 55, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, 65,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 21, 30, 31, 32, 33, 27, 35, 36, 28, 38, 39, 40, 41, 42, 35, 44, 45, 36, 47, 48, 49, 50, 51, 52, 44, 54, 55, 45, 57, 58, 59, 60, 61, 62, 63, 54, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 66, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 77,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 21, 30, 31, 32, 33, 27, 35, 36, 28, 38, 39, 40, 41, 42, 35, 44, 45, 36, 47, 48, 49, 50, 51, 52, 44, 54, 55, 45, 57, 58, 59, 60, 61, 62, 63, 54, 65, 66, 55, 68, 69, 70, 71, 72, 73, 74, 75, 65, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 78, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 90,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 21, 30, 31, 32, 33, 27, 35, 36, 28, 38, 39, 40, 41, 42, 35, 44, 45, 36, 47, 48, 49, 50, 51, 52, 44, 54, 55, 45, 57, 58, 59, 60, 61, 62, 63, 54, 65, 66, 55, 68, 69, 70, 71, 72, 73, 74, 75, 65, 77, 78, 66, 80, 81, 82, 83, 84, 85, 86, 87, 88, 77, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 91, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 104,
0, 0, 0, 3, 2, 5, 6, 3, 5, 9, 10, 6, 12, 9, 14, 15, 10, 17, 18, 14, 20, 21, 15, 23, 24, 25, 20, 27, 28, 21, 30, 31, 32, 33, 27, 35, 36, 28, 38, 39, 40, 41, 42, 35, 44, 45, 36, 47, 48, 49, 50, 51, 52, 44, 54, 55, 45, 57, 58, 59, 60, 61, 62, 63, 54, 65, 66, 55, 68, 69, 70, 71, 72, 73, 74, 75, 65, 77, 78, 66, 80, 81, 82, 83, 84, 85, 86, 87, 88, 77, 90, 91, 78, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 90, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 105, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 119,
};
__device__ constexpr int _DOWN2[] = {
-1,
-1, -1, -1,
0, -1, -1, 0, -1, 0,
0, -1, -1, -1, -1, -1, 1, -1, -1, 2,
0, -1, -1, 3, -1, 5, -1, -1, -1, -1, 3, -1, 5, -1, 5,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, -1, -1, -1, -1, -1, 6, -1, 8, 9, -1, 9,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, -1, -1, -1, -1, -1, -1, 10, -1, 12, 13, 14, -1, 14,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, -1, -1, -1, -1, -1, -1, -1, 15, -1, 17, 18, 19, 20, -1, 20,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, -1, -1, -1, -1, -1, -1, -1, -1, 21, -1, 23, 24, 25, 26, 27, -1, 27,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, 28, -1, 30, 31, 32, 33, -1, 35, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, -1, 30, 31, 32, 33, 34, 35, -1, 35,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, 28, -1, 30, 31, 32, 33, -1, 35, 36, -1, 38, 39, 40, 41, 42, -1, 44, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 36, -1, 38, 39, 40, 41, 42, 43, 44, -1, 44,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, 28, -1, 30, 31, 32, 33, -1, 35, 36, -1, 38, 39, 40, 41, 42, -1, 44, 45, -1, 47, 48, 49, 50, 51, 52, -1, 54, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45, -1, 47, 48, 49, 50, 51, 52, 53, 54, -1, 54,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, 28, -1, 30, 31, 32, 33, -1, 35, 36, -1, 38, 39, 40, 41, 42, -1, 44, 45, -1, 47, 48, 49, 50, 51, 52, -1, 54, 55, -1, 57, 58, 59, 60, 61, 62, 63, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 55, -1, 57, 58, 59, 60, 61, 62, 63, 64, 65, -1, 65,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, 28, -1, 30, 31, 32, 33, -1, 35, 36, -1, 38, 39, 40, 41, 42, -1, 44, 45, -1, 47, 48, 49, 50, 51, 52, -1, 54, 55, -1, 57, 58, 59, 60, 61, 62, 63, -1, 65, 66, -1, 68, 69, 70, 71, 72, 73, 74, 75, -1, 77, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 66, -1, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, -1, 77,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, 28, -1, 30, 31, 32, 33, -1, 35, 36, -1, 38, 39, 40, 41, 42, -1, 44, 45, -1, 47, 48, 49, 50, 51, 52, -1, 54, 55, -1, 57, 58, 59, 60, 61, 62, 63, -1, 65, 66, -1, 68, 69, 70, 71, 72, 73, 74, 75, -1, 77, 78, -1, 80, 81, 82, 83, 84, 85, 86, 87, 88, -1, 90, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 78, -1, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, -1, 90,
0, -1, -1, 3, -1, 5, 6, -1, -1, 9, 10, -1, 12, -1, 14, 15, -1, 17, 18, -1, 20, 21, -1, 23, 24, 25, -1, 27, 28, -1, 30, 31, 32, 33, -1, 35, 36, -1, 38, 39, 40, 41, 42, -1, 44, 45, -1, 47, 48, 49, 50, 51, 52, -1, 54, 55, -1, 57, 58, 59, 60, 61, 62, 63, -1, 65, 66, -1, 68, 69, 70, 71, 72, 73, 74, 75, -1, 77, 78, -1, 80, 81, 82, 83, 84, 85, 86, 87, 88, -1, 90, 91, -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, -1, 104, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 91, -1, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 104,
};
__device__ constexpr int _DOWN_XYZ[] = {
2,
0, 1, 2,
0, 0, 0, 1, 1, 2,
0, 1, 2, 0, 0, 0, 1, 2, 1, 2,
0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
0, 1, 2, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
};
__device__ constexpr int _DOWN_XYZ_ORDER[] = {
0,
0, 0, 0,
1, 0, 0, 1, 0, 1,
2, 0, 0, 0, 0, 0, 2, 0, 0, 2,
3, 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 0, 1, 0, 3,
4, 0, 0, 2, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 2, 1, 0, 4,
5, 0, 0, 3, 0, 3, 2, 0, 0, 2, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 5, 0, 3, 2, 1, 0, 5,
6, 0, 0, 4, 0, 4, 3, 0, 0, 3, 2, 0, 2, 0, 2, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 4, 3, 2, 1, 0, 6,
7, 0, 0, 5, 0, 5, 4, 0, 0, 4, 3, 0, 3, 0, 3, 2, 0, 2, 2, 0, 2, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 5, 4, 3, 2, 1, 0, 7,
8, 0, 0, 6, 0, 6, 5, 0, 0, 5, 4, 0, 4, 0, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 6, 5, 4, 3, 2, 1, 0, 8,
9, 0, 0, 7, 0, 7, 6, 0, 0, 6, 5, 0, 5, 0, 5, 4, 0, 4, 4, 0, 4, 3, 0, 3, 3, 3, 0, 3, 2, 0, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 7, 6, 5, 4, 3, 2, 1, 0, 9,
10, 0, 0, 8, 0, 8, 7, 0, 0, 7, 6, 0, 6, 0, 6, 5, 0, 5, 5, 0, 5, 4, 0, 4, 4, 4, 0, 4, 3, 0, 3, 3, 3, 3, 0, 3, 2, 0, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 8, 7, 6, 5, 4, 3, 2, 1, 0, 10,
11, 0, 0, 9, 0, 9, 8, 0, 0, 8, 7, 0, 7, 0, 7, 6, 0, 6, 6, 0, 6, 5, 0, 5, 5, 5, 0, 5, 4, 0, 4, 4, 4, 4, 0, 4, 3, 0, 3, 3, 3, 3, 3, 0, 3, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 11,
12, 0, 0, 10, 0, 10, 9, 0, 0, 9, 8, 0, 8, 0, 8, 7, 0, 7, 7, 0, 7, 6, 0, 6, 6, 6, 0, 6, 5, 0, 5, 5, 5, 5, 0, 5, 4, 0, 4, 4, 4, 4, 4, 0, 4, 3, 0, 3, 3, 3, 3, 3, 3, 0, 3, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 12,
13, 0, 0, 11, 0, 11, 10, 0, 0, 10, 9, 0, 9, 0, 9, 8, 0, 8, 8, 0, 8, 7, 0, 7, 7, 7, 0, 7, 6, 0, 6, 6, 6, 6, 0, 6, 5, 0, 5, 5, 5, 5, 5, 0, 5, 4, 0, 4, 4, 4, 4, 4, 4, 0, 4, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 13,
14, 0, 0, 12, 0, 12, 11, 0, 0, 11, 10, 0, 10, 0, 10, 9, 0, 9, 9, 0, 9, 8, 0, 8, 8, 8, 0, 8, 7, 0, 7, 7, 7, 7, 0, 7, 6, 0, 6, 6, 6, 6, 6, 0, 6, 5, 0, 5, 5, 5, 5, 5, 5, 0, 5, 4, 0, 4, 4, 4, 4, 4, 4, 4, 0, 4, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 14,
};
#define bufsize(i, j)                                                          \
    _GBUFSIZE[((i >= j) ? (i * (i + 1) / 2 + j) : (j * (j + 1) / 2 + i))]
#define WHEREX_IF_L_INC1(i) i
#define WHEREY_IF_L_INC1(i) _UPIDY[i]
#define WHEREZ_IF_L_INC1(i) _UPIDZ[i]
#define STARTX_IF_L_DEC1(i) 0
#define STARTY_IF_L_DEC1(i) ((i < 2) ? 0 : _LEN_CART[i - 2])
#define STARTZ_IF_L_DEC1(i) (_LEN_CART[i - 1] - 1)
#define ADDR_IF_L_DEC1(l, m) _DOWN1[_CUM_LEN_CART[l - 1] + m]
#define ADDR_IF_L_DEC2(l, m) _DOWN2[_CUM_LEN_CART[l - 1] + m]
#define DEC1_XYZ(l, m) _DOWN_XYZ[_CUM_LEN_CART[l - 1] + m]
#define DEC1_XYZ_ORDER(l, m) _DOWN_XYZ_ORDER[_CUM_LEN_CART[l - 1] + m]

extern "C" {
void PBC_ft_bvk_drv(const int eval_gz, cuDoubleComplex *out, void *buf,
    size_t bufsize, int nkpts, int comp, int nimgs, int bvk_nimgs, double *d_Ls,
    cuDoubleComplex *d_expkL, int *shls_slice, int *ao_loc, int *d_cell_loc_bvk,
    int8_t *d_ovlp_mask, double *d_Gv, double *d_b, int *d_gxyz, int *gs,
    int nGv, int *atm, int natm, int *bas, int nbas, double *env);

void PBC_ft_latsum_drv(const int eval_gz, cuDoubleComplex *out, void *buf,
    size_t bufsize, int nkpts, int comp, int nimgs, double *d_Ls,
    cuDoubleComplex *d_expkL, int *shls_slice, int *ao_loc, double *d_Gv,
    double *d_b, int *d_gxyz, int *gs, int nGv, int *atm, int natm, int *bas,
    int nbas, double *env);
}

int PBCsizeof_env(const int *shls_slice, const int *atm, const int natm,
    const int *bas, const int nbas, const double *env);
template <int Fz>
__host__ __device__ void eval_gz(cuDoubleComplex *out, const double aij,
    const double *rij, const cuDoubleComplex fac, const double *Gv,
    const double *b, const int *gxyz, const int3 &gs, const size_t NGv,
    const size_t iGv);
